#This script executes regressions to estimate 
#associations between first-generation prenatal exposure to nonattainment 
#and first-generation family structure outcomes (for mothers).

rm(list=ls())
gc()
library(reldist)
library(ineq)
library(stargazer)
library(data.table)
library(dplyr)
library(dtplyr)
library(ggplot2)
library(stringdist)
library(lfe)
library(haven)
library(readr)
library(broom)
library(tidylog)
library(rdrobust)

# Set the working directory
setwd("/projects/opp_env/caa1970/")

# Source external R scripts for custom functions
source("Code/Child_Outcomes/child_outcomes_functions.R")
source("Code/rounding.r")

# Load the first-generation analysis dataset with ACS link.
load("Data/first_gen_analysis_data_jepmicro_acs.rda")

# Load the first-generation analysis dataset
load("Data/first_gen_analysis_data_jepmicro.rda")

Table_B6 <- data.frame()

TabB6_A1 <- felm(any_link ~factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm |state_year+county_factor+year_factor+month+survey_year_by_year | (all_fullterm~nonattainment_infant)  |county_factor, 
                 data=parent_piks%>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1,real_wages < wage_cap, !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC)))

summary(TabB6_A1)

TabB6_A1$N
temp_n<-  rounding_rules_counts(TabB6_A1$N)
temp_ctrl <- signif(mean((parent_piks %>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1,real_wages < wage_cap, !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$any_link, na.rm=T ),4)
temp_f <-  signif(TabB6_A1$stage1$iv1fstat[[1]]["F"],4)
temp_results <- tidy(TabB6_A1) %>% filter(term == "`all_fullterm(fit)`" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, fstat = temp_f,  table = "B2", column ="A1" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset

TabB6_A2 <- felm(total_any_links ~factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm |state_year+county_factor+year_factor+month+survey_year_by_year | (all_fullterm~nonattainment_infant)  |county_factor, 
                 data=parent_piks%>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1,real_wages < wage_cap, !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC)))

summary(TabB6_A2)
TabB6_A2$N
temp_n<-  rounding_rules_counts(TabB6_A2$N)
temp_ctrl <- signif(mean((parent_piks %>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1,real_wages < wage_cap, !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$total_any_links, na.rm=T ),4)
temp_f <-  signif(TabB6_A2$stage1$iv1fstat[[1]]["F"],4)
temp_results <- tidy(TabB6_A2) %>% filter(term == "`all_fullterm(fit)`" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, fstat = temp_f,  table = "B2", column ="A2" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset

TabB6_A3 <- felm(second_gen ~factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm |state_year+county_factor+year_factor+month+survey_year_by_year | (all_fullterm~nonattainment_infant)  |county_factor, 
                 data=parent_piks_acs%>% filter(year%in%1969:1975,AGE >33 & AGE < 44, parent_female == 1,real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_age_min), !is.na(lfpr), !is.na(PI_PC)))
summary(TabB6_A3)
TabB6_A3$N

temp_n<-  rounding_rules_counts(TabB6_A3$N)
temp_ctrl <- signif(mean((parent_piks_acs %>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$second_gen, na.rm=T ),4)
temp_f <-  signif(TabB6_A3$stage1$iv1fstat[[1]]["F"],4)
temp_results <- tidy(TabB6_A3) %>% filter(term == "`all_fullterm(fit)`" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, fstat = temp_f,  table = "B2", column ="A3" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset

TabB6_A4 <- felm(parent_age_min ~factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm |state_year+county_factor+year_factor+month+survey_year_by_year | (all_fullterm~nonattainment_infant)  |county_factor, 
                 data=parent_piks_acs%>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1,real_wages < wage_cap, !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC)))

summary(TabB6_A4)
TabB6_A4$N
temp_n<-  rounding_rules_counts(TabB6_A4$N)
temp_ctrl <- signif(mean((parent_piks_acs %>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_age_min), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$parent_age_min, na.rm=T ),4)
temp_f <-  signif(TabB6_A4$stage1$iv1fstat[[1]]["F"],4)
temp_results <- tidy(TabB6_A4) %>% filter(term == "`all_fullterm(fit)`" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, fstat = temp_f,  table = "B2", column ="A4" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset

TabB6_A5 <- felm(married ~factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm |state_year+county_factor+year_factor+month+survey_year_by_year | (all_fullterm~nonattainment_infant)  |county_factor, 
                 data=parent_piks_acs%>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1,real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_age_min), !is.na(lfpr), !is.na(PI_PC)))

summary(TabB6_A5)
TabB6_A5$N

temp_n<-  rounding_rules_counts(TabB6_A5$N)
temp_ctrl <- signif(mean((parent_piks_acs %>% filter(year%in%1969:1975, AGE >33 & AGE < 44,parent_female == 1, real_wages < wage_cap, !is.na(all_fullterm), !is.na(parent_age_min), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$married, na.rm=T ),4)
temp_f <-  signif(TabB6_A5$stage1$iv1fstat[[1]]["F"],4)
temp_results <- tidy(TabB6_A5) %>% filter(term == "`all_fullterm(fit)`" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, fstat = temp_f,  table = "B2", column ="A5" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset


TabB6_A6 <- felm(divorced ~factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm |state_year+county_factor+year_factor+month+survey_year_by_year | (all_fullterm~nonattainment_infant)  |county_factor, 
                 data=parent_piks_acs%>% filter(year%in%1969:1975, AGE >33 & AGE < 44,parent_female == 1, real_wages < wage_cap, !is.na(all_fullterm),!is.na(parent_age_min), !is.na(lfpr), !is.na(PI_PC)))

summary(TabB6_A6)
TabB6_A6$N
temp_n<-  rounding_rules_counts(TabB6_A6$N)
temp_ctrl <- signif(mean((parent_piks_acs %>% filter(year%in%1969:1975, AGE >33 & AGE < 44,parent_female == 1, real_wages < wage_cap, !is.na(parent_age_min), !is.na(divorced_new), !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$divorced, na.rm=T ),4)
temp_f <-  signif(TabB6_A6$stage1$iv1fstat[[1]]["F"],4)
temp_results <- tidy(TabB6_A6) %>% filter(term == "`all_fullterm(fit)`" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, fstat = temp_f,  table = "B2", column ="A6" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset


TabB6_B1 <- felm(any_link ~factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm+nonattainment_infant |state_year+county_factor+year_factor+month+survey_year_by_year | 0  |county_factor, 
                 data=parent_piks%>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC)))

summary(TabB6_B1)
TabB6_B1$N
temp_n<-  rounding_rules_counts(TabB6_B1$N)
temp_ctrl <- signif(mean((parent_piks %>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$any_link, na.rm=T ),4)
temp_results <- tidy(TabB6_B1) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl,  table = "B2", column ="B1" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset

TabB6_B2 <- felm(total_any_links ~factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm+nonattainment_infant |state_year+county_factor+year_factor+month+survey_year_by_year | 0  |county_factor, 
                 data=parent_piks%>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC)))

summary(TabB6_B2)
TabB6_B2$N
temp_n<-  rounding_rules_counts(TabB6_B2$N)
temp_ctrl <- signif(mean((parent_piks %>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$total_any_links, na.rm=T ),4)
temp_results <- tidy(TabB6_B2) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl,  table = "B2", column ="B2" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset

TabB6_B3 <- felm(second_gen ~nonattainment_infant + factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm |state_year+county_factor+year_factor+month+survey_year_by_year | 0 |county_factor, 
                 data=parent_piks_acs%>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(all_fullterm), !is.na(parent_age_min), !is.na(lfpr), !is.na(PI_PC)))
summary(TabB6_B3)
TabB6_B3$N
temp_n<-  rounding_rules_counts(TabB6_B3$N)
temp_ctrl <- signif(mean((parent_piks_acs %>% filter(year%in%1969:1975, AGE >33 & AGE < 44,parent_female == 1,  real_wages < wage_cap, !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$second_gen, na.rm=T ),4)
temp_results <- tidy(TabB6_B3) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl,  table = "B2", column ="B3" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset


TabB6_B4 <- felm(parent_age_min ~factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm+nonattainment_infant |state_year+county_factor+year_factor+month+survey_year_by_year | 0  |county_factor, 
                 data=parent_piks_acs%>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(parent_age_min), !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC)))

summary(TabB6_B4)
TabB6_B4$N
temp_n<-  rounding_rules_counts(TabB6_B4$N)
temp_ctrl <- signif(mean((parent_piks_acs %>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(parent_age_min), !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$parent_age_min, na.rm=T ),4)
temp_results <- tidy(TabB6_B4) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl,  table = "B2", column ="B4" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset

TabB6_B5 <- felm(married ~factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm+nonattainment_infant |state_year+county_factor+year_factor+month+survey_year_by_year | 0  |county_factor, 
                 data=parent_piks_acs%>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(parent_age_min), !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC)))

summary(TabB6_B5)
TabB6_B5$N

temp_n<-  rounding_rules_counts(TabB6_B5$N)
temp_ctrl <- signif(mean((parent_piks_acs %>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(parent_age_min), !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$married, na.rm=T ),4)
temp_results <- tidy(TabB6_B5) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl,  table = "B2", column ="B5" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset


TabB6_B6 <- felm(divorced ~factor(parent_race)+PI_PC*year_trend+
                   PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                   county_pop*year_trend + epop*I(year_trend^2) + 
                   total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                   tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm+nonattainment_infant |state_year+county_factor+year_factor+month+survey_year_by_year | 0  |county_factor, 
                 data=parent_piks_acs%>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(parent_age_min), !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC)))

summary(TabB6_B6)
TabB6_B6$N
temp_n<-  rounding_rules_counts(TabB6_B6$N)
temp_ctrl <- signif(mean((parent_piks_acs %>% filter(year%in%1969:1975, AGE >33 & AGE < 44, parent_female == 1, real_wages < wage_cap, !is.na(parent_age_min), !is.na(all_fullterm), !is.na(lfpr), !is.na(PI_PC),nonattainment_infant == 0))$divorced, na.rm=T ),4)
temp_results <- tidy(TabB6_B6) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl,  table = "B2", column ="B6" ) #add variables for N and control mean, plus index the table number and column
Table_B6 <- bind_rows(Table_B6, temp_results) #Appended to big dataset

write_csv(Table_B6, "/projects/opp_env/caa1970/JPE_Micro/Output/Table B6/Table_B6.csv")